#############################################################
# CONVERTS DATA FROM 2010 CENSUS TRACTS TO 2000 CENSUS UNITS
#############################################################

#conversion by areas of census tract shapefiles (used by Andam et al 2010)
# based on code written by Christoph Nolte
# questions to bowydenbraber@gmail.com

#library(xlsReadWrite)
library(foreign)
library(plyr)

#read in file from previous document

cu<-read.dbf("CU00_UC_coverage.dbf")


######################
# CONVERSION BY AREA
######################

# Read overlap shapefile  
# = Polygons of overlaps (merge) between 2000 census units and 2010 census tracts
ol <- read.dbf(ff("Census_spatial/CU00MCT10.dbf"), as.is=TRUE)
ol <- ol[,c("CU_ID_50M","CD_GEOCODI","F_AREA")]
colnames(ol) <- c("cu00id", "ct10code", "area")
ol$ct10code <- as.numeric(ol$ct10code)
cu00ids <- unique(ol$cu00id)
ol <- ol[order(ol$ct10code),]

# For each overlap, compute which percentage of the 2010 census tract it contains
n <- nrow(ol)
i <- 1
stopper <- 1
out <- NULL
while(i<=n && stopper<=n) {
  ct10code <- ol$ct10code[i]
  o <- 1
  while(i+o <= n && ol$ct10code[i+o]==ct10code) o <- o + 1
  rows <- ol[i:(i+o-1),]
  rows$area.perc <- rows$area/sum(rows$area)
  out <- c(out,rows$area.perc)
  i <- i+o
  stopper <- stopper+1
}
ol$area.perc <- round(out,digits=3)

# Read variables that are to be converted to census units
d10 <- read.csv(ff("BraCens2010 - Key Data.csv"))
ol <- merge(ol,d10,by.x="ct10code",by.y="code",all.x=TRUE)

# Set columns to be converted
col.aggr <- colnames(ol)[which(colnames(ol)=="hh"):length(colnames(ol))]

# Convert variables (= assign #people/households to each overlap)
for (colname in col.aggr)
{
  # Note: All columns represent absolute numbers (of people/households),
  #       and can therefore be equally distributed by area percentage 
  #       of 2010 census tract
  
  # Set NAs to zero
  ol[,paste(colname,".f",sep="")] <- ifelse(is.na(ol[,colname]),0,ol[,colname])
  # Calculate # population/households of the 2010 census tract 
  # that will be attributed to the 2000 census unit
  ol[,paste(colname,".f",sep="")] <- ol$area.perc*ol[,paste(colname,".f",sep="")]
}

# Aggregate (sum) variables by census units = obtain totals
cu00 <- aggregate(ol[,c("cu00id","area",paste(col.aggr,".f",sep=""))],by=list(ol$cu00id),FUN=sum)[c(1,3:(length(col.aggr)+3))]
colnames(cu00) <- c("cu00id","area.10",paste(col.aggr,".10",sep=""))

# Write results
write.csv(cu00, ff("CU00_BraCens2010.csv"),row.names=FALSE)
write.dbf(cu00, ff("CU00_BraCens2010.dbf"))



